{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9aa3c1b1-1587-45de-baaf-e0ccec1ddd8f",
   "metadata": {},
   "source": [
    "# LOADING CSV"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "573bbbff-e4e9-4a5e-a43f-8feb51d3124f",
   "metadata": {},
   "source": [
    "<table align=\"left\">\n",
    "  <td>\n",
    "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/georgia-tech-db/eva/blob/master/tutorials/06-loading-structured-data.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /> Run on Google Colab</a>\n",
    "  </td>\n",
    "  <td>\n",
    "    <a target=\"_blank\" href=\"https://github.com/georgia-tech-db/eva/blob/master/tutorials/06-loading-structured-data.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /> View source on GitHub</a>\n",
    "  </td>\n",
    "  <td>\n",
    "    <a target=\"_blank\" href=\"https://raw.githubusercontent.com/georgia-tech-db/eva/master/tutorials/06-loading-structured-data.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /> Download notebook</a>\n",
    "  </td>\n",
    "</table><br><br>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "27d7ebfa-c0b6-4a15-bd17-59a4e1cb7c77",
   "metadata": {},
   "source": [
    "### Connect to EvaDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c758b60b-e75e-4128-805d-46a210638daf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install --quiet \"evadb[vision,notebook]\"\n",
    "import evadb\n",
    "cursor = evadb.connect().cursor()\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "d7b7c9d5-2eae-4d57-a3f1-78792fca0b83",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Create a Meta-data Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8897b9bb-0993-4eb0-959d-6484a651a90f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "09-06-2023 16:06:48 WARNING[executor_utils:executor_utils.py:handle_if_not_exists:0086] Table: bddtestmeta already exists\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: []"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "create_table_query = f\"\"\"\n",
    "    CREATE TABLE IF NOT EXISTS bddtestmeta(\n",
    "        id INTEGER UNIQUE,\n",
    "        frame_id INTEGER,\n",
    "        video_id INTEGER,\n",
    "        dataset_name TEXT(30),\n",
    "        label TEXT(30),\n",
    "        bbox NDARRAY FLOAT32(4),\n",
    "        object_id INTEGER\n",
    "    );\n",
    "\"\"\"\n",
    "cursor.query(create_table_query).df()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ca54c9e5-6ec9-4aea-b421-ad1fa3ddcdd1",
   "metadata": {},
   "source": [
    "### Download Berkeley Deepdrive Videos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "62def7ce-3f83-4fa0-b9fd-2e553b3919ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File ‘bddtest.zip’ already there; not retrieving.\n",
      "\n",
      "Archive:  bddtest.zip\n"
     ]
    }
   ],
   "source": [
    "# sample dataset of 2 videos\n",
    "!wget -nc \"https://www.dropbox.com/s/kg1q69ijbhjfecw/bddtest.zip\"\n",
    "\n",
    "# unzip\n",
    "!unzip -n bddtest.zip"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "574f8c04-c934-4654-8558-576e34e8735d",
   "metadata": {},
   "source": [
    "### Load 2 CSV files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cf8415ac-f9e0-4bee-b2aa-b4e104b10a4b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CSV</th>\n",
       "      <th>Number of loaded frames</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>bddtest/info/bddtest_00a395fed60c0b47.csv</td>\n",
       "      <td>4355</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         CSV  Number of loaded frames\n",
       "0  bddtest/info/bddtest_00a395fed60c0b47.csv                     4355"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cursor.query(\"LOAD CSV './bddtest/info/bddtest_00a2e3ca5c856cde.csv' INTO bddtestmeta\").df()\n",
    "cursor.query(\"LOAD CSV './bddtest/info/bddtest_00a395fed60c0b47.csv' INTO bddtestmeta\").df()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "463ce1f8-8333-4e11-ab0a-dda6d9bb067b",
   "metadata": {},
   "source": [
    "## Load the videos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "95b1e0a4-d9f5-40f2-830f-69c8c9f21172",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Number of loaded VIDEO: 1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           0\n",
       "0  Number of loaded VIDEO: 1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cursor.query('DROP TABLE IF EXISTS bddtest_1;').df()\n",
    "cursor.query('DROP TABLE IF EXISTS bddtest_2;').df()\n",
    "\n",
    "cursor.query(\"LOAD VIDEO './bddtest/videos/bddtest_00a2e3ca5c856cde.mp4' INTO bddtest_1\").df()\n",
    "cursor.query(\"LOAD VIDEO './bddtest/videos/bddtest_00a395fed60c0b47.mp4' INTO bddtest_2\").df()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8ace1640-fb0c-4e70-a595-f7e9befdf757",
   "metadata": {},
   "source": [
    "## Run a Query over the Video dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "69e378d4-4d30-47cf-84c8-8fe56afe517e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bddtest_1.id</th>\n",
       "      <th>yolo.labels</th>\n",
       "      <th>yolo.bboxes</th>\n",
       "      <th>yolo.scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[car, stop sign, car, car, car, car, car, car,...</td>\n",
       "      <td>[[490.39593505859375, 332.89349365234375, 716....</td>\n",
       "      <td>[0.93, 0.91, 0.88, 0.82, 0.77, 0.73, 0.72, 0.5...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[car, car, stop sign, car, car, car, car, car,...</td>\n",
       "      <td>[[486.3235168457031, 331.76837158203125, 715.9...</td>\n",
       "      <td>[0.92, 0.87, 0.85, 0.77, 0.74, 0.7, 0.62, 0.56...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[car, stop sign, car, car, car, car, car, car,...</td>\n",
       "      <td>[[481.6331481933594, 331.23248291015625, 715.7...</td>\n",
       "      <td>[0.93, 0.9, 0.87, 0.78, 0.77, 0.74, 0.57, 0.54...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   bddtest_1.id                                        yolo.labels  \\\n",
       "0             0  [car, stop sign, car, car, car, car, car, car,...   \n",
       "1             1  [car, car, stop sign, car, car, car, car, car,...   \n",
       "2             2  [car, stop sign, car, car, car, car, car, car,...   \n",
       "\n",
       "                                         yolo.bboxes  \\\n",
       "0  [[490.39593505859375, 332.89349365234375, 716....   \n",
       "1  [[486.3235168457031, 331.76837158203125, 715.9...   \n",
       "2  [[481.6331481933594, 331.23248291015625, 715.7...   \n",
       "\n",
       "                                         yolo.scores  \n",
       "0  [0.93, 0.91, 0.88, 0.82, 0.77, 0.73, 0.72, 0.5...  \n",
       "1  [0.92, 0.87, 0.85, 0.77, 0.74, 0.7, 0.62, 0.56...  \n",
       "2  [0.93, 0.9, 0.87, 0.78, 0.77, 0.74, 0.57, 0.54...  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = cursor.query(\"\"\"\n",
    "    SELECT id, Yolo(data)\n",
    "    FROM bddtest_1\n",
    "    WHERE id < 3\n",
    "\"\"\")\n",
    "query.df()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "55551f98-6e3f-4088-a02f-6852b56dfc63",
   "metadata": {},
   "source": [
    "## Run a query over the Meta-data Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2990038a-00ec-4d36-aae2-82a789f2389a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bddtestmeta._row_id</th>\n",
       "      <th>bddtestmeta.id</th>\n",
       "      <th>bddtestmeta.frame_id</th>\n",
       "      <th>bddtestmeta.video_id</th>\n",
       "      <th>bddtestmeta.dataset_name</th>\n",
       "      <th>bddtestmeta.label</th>\n",
       "      <th>bddtestmeta.bbox</th>\n",
       "      <th>bddtestmeta.object_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8185</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[491.7783, 320.9238, 717.0901, 512.1478]</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8186</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[928.2679, 298.47574, 1279.5381, 571.17786]</td>\n",
       "      <td>513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8187</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[816.0277, 347.52887, 1029.6998, 474.7344]</td>\n",
       "      <td>514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8188</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[811.87067, 325.91223, 948.2217, 408.2217]</td>\n",
       "      <td>515</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>8189</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[673.0254, 325.08084, 751.17786, 367.48267]</td>\n",
       "      <td>516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6797</th>\n",
       "      <td>11153</td>\n",
       "      <td>11581</td>\n",
       "      <td>1197</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[554.96533, 335.88916, 588.2217, 378.291]</td>\n",
       "      <td>643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6798</th>\n",
       "      <td>11154</td>\n",
       "      <td>11582</td>\n",
       "      <td>1197</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[693.8106, 350.0231, 734.5497, 395.75058]</td>\n",
       "      <td>644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6799</th>\n",
       "      <td>11155</td>\n",
       "      <td>11583</td>\n",
       "      <td>1197</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[683.83374, 345.03464, 733.71826, 389.93073]</td>\n",
       "      <td>642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6800</th>\n",
       "      <td>11156</td>\n",
       "      <td>11584</td>\n",
       "      <td>1197</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[676.351, 346.69745, 699.6305, 384.11084]</td>\n",
       "      <td>641</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6801</th>\n",
       "      <td>11157</td>\n",
       "      <td>11585</td>\n",
       "      <td>1197</td>\n",
       "      <td>4</td>\n",
       "      <td>bddtest</td>\n",
       "      <td>car</td>\n",
       "      <td>[662.2171, 351.6859, 681.3395, 377.4596]</td>\n",
       "      <td>638</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6802 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      bddtestmeta._row_id  bddtestmeta.id  bddtestmeta.frame_id  \\\n",
       "0                       1            8185                     0   \n",
       "1                       2            8186                     0   \n",
       "2                       3            8187                     0   \n",
       "3                       4            8188                     0   \n",
       "4                       5            8189                     0   \n",
       "...                   ...             ...                   ...   \n",
       "6797                11153           11581                  1197   \n",
       "6798                11154           11582                  1197   \n",
       "6799                11155           11583                  1197   \n",
       "6800                11156           11584                  1197   \n",
       "6801                11157           11585                  1197   \n",
       "\n",
       "      bddtestmeta.video_id bddtestmeta.dataset_name bddtestmeta.label  \\\n",
       "0                        4                  bddtest               car   \n",
       "1                        4                  bddtest               car   \n",
       "2                        4                  bddtest               car   \n",
       "3                        4                  bddtest               car   \n",
       "4                        4                  bddtest               car   \n",
       "...                    ...                      ...               ...   \n",
       "6797                     4                  bddtest               car   \n",
       "6798                     4                  bddtest               car   \n",
       "6799                     4                  bddtest               car   \n",
       "6800                     4                  bddtest               car   \n",
       "6801                     4                  bddtest               car   \n",
       "\n",
       "                                  bddtestmeta.bbox  bddtestmeta.object_id  \n",
       "0         [491.7783, 320.9238, 717.0901, 512.1478]                    512  \n",
       "1      [928.2679, 298.47574, 1279.5381, 571.17786]                    513  \n",
       "2       [816.0277, 347.52887, 1029.6998, 474.7344]                    514  \n",
       "3       [811.87067, 325.91223, 948.2217, 408.2217]                    515  \n",
       "4      [673.0254, 325.08084, 751.17786, 367.48267]                    516  \n",
       "...                                            ...                    ...  \n",
       "6797     [554.96533, 335.88916, 588.2217, 378.291]                    643  \n",
       "6798     [693.8106, 350.0231, 734.5497, 395.75058]                    644  \n",
       "6799  [683.83374, 345.03464, 733.71826, 389.93073]                    642  \n",
       "6800     [676.351, 346.69745, 699.6305, 384.11084]                    641  \n",
       "6801      [662.2171, 351.6859, 681.3395, 377.4596]                    638  \n",
       "\n",
       "[6802 rows x 8 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = cursor.query(\"\"\"\n",
    "    SELECT *\n",
    "    FROM bddtestmeta\n",
    "    WHERE bddtestmeta.video_id = 4\n",
    "\"\"\")\n",
    "query.df()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
